export VLLM_USE_FLASHINFER_SAMPLER=0
vllm serve /data/model/Qwen3-Embedding-0.6B --port 10001 --served-model-name chat \
    --runner pooling --convert embed --gpu-memory-utilization 0.2